## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
## 
## Please cite as: 
## 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## 
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

Participant Information

Here, we examine participant demographics, such as their age and gender, in addition to information about their work industry and experience. Overall, our sample is highly skewed in terms of gender. A representative average participant would be a man who works in a relatively large technology company with some years of experience.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Selecting by ct

Covariate checks between conditions

ggplot(pt_info, aes(x = age))+
  geom_histogram(bins = 30, aes(fill = Condition), alpha = 0.8)+
  labs(title = "Distribution of Participant Age",
       subtitle = "Age was relatively similar across conditions.",
       x = "Age",
       y = "Count")+
  facet_grid(Condition ~ .)+
  theme(strip.text.y = element_text(angle = 0))

pt_info %>% 
  group_by(Condition) %>% 
  summarise(avg_age = mean(age))
## # A tibble: 4 × 2
##   Condition            avg_age
##   <chr>                  <dbl>
## 1 Man_Nontraditional      38.7
## 2 Man_Traditional         39.3
## 3 Woman_Nontraditional    41.4
## 4 Woman_Traditional       38.9
ggplot(pt_info, aes(x = as.factor(gender)))+
  geom_bar(stat = "count", position= "dodge", aes(fill = Condition), alpha = 0.8)+
  scale_x_discrete(limits = c("Man", "Woman", "Non-binary / third gender", "Prefer not to say"))+
  labs(title = "Distribution of Participant Gender and Condition",
       subtitle = "Gender was relatively similar across conditions.",
       x = "Reported Gender",
       y = "Count")

industry_dist <- ggplot(industry_df %>% top_n(10) , aes(x = factor(industry, levels = industry), y  = ct))+
  geom_bar(stat = "identity", aes(fill = Condition), alpha = 0.8)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  labs(title = "Distribution of Participant Industry", 
       subtitle = paste0(
         round(subset(industry_df, industry == "Technology")$ct / nrow(pt_info), 2) * 100,
         "% of participants work in Tehcnology"
       ),
       x = "Industry Area",
       y = "Count")
## Selecting by ct
ggplot(pt_info, aes(x = factor(str_replace_all(company_size, " employees", ""),
                               levels = c("Less than 50",
                                          "50-100",
                                          "100-500",
                                          "500-1,000",
                                          "1,000+"))))+
  geom_bar(stat = "count", position = "dodge", aes(fill = Condition), alpha = 0.8)+
  labs(title = "Distribution of Participant Company Size",
       subtitle = "Number of employees",
       x = "Company Size",
       y = "Count")

ggplot(pt_info, aes(x = factor(experience,
                               levels = c("Less than 1 year",
                                          "1-2 years",
                                          "3-5 years",
                                          "5-10 years",
                                          "10+ years"))))+
  geom_bar(stat = "count", aes(fill = Condition), position = "dodge", alpha = 0.8)+
  labs(title = "Distribution of Participant Work Experience",
       subtitle = "",
       x = "Years of Experience",
       y = "Count")

ggplot(pt_info, aes(x = risk))+
  geom_bar(stat = "count", aes(fill = Condition), alpha = 0.8)+
  labs(title = "Distribution of Participant Risk Aversion",
       x = "Average Response to Risk Items",
       y = "Count")+
  facet_grid(Condition ~.)+
  theme(strip.text.y = element_text(angle = 0))

Summary Stats

Here, we examine some basic information about how ratings for different applicants varied.

average_ratings <- analysis %>% 
  group_by(Condition) %>% 
  summarise(avg_rating = mean(rating),
            standard_dev = sd(rating),
            max = avg_rating+ standard_dev,
            min = avg_rating - standard_dev,
            n = n())
average_ratings
## # A tibble: 4 × 6
##   Condition            avg_rating standard_dev   max   min     n
##   <fct>                     <dbl>        <dbl> <dbl> <dbl> <int>
## 1 Man_Traditional            7.60         1.47  9.07  6.14   243
## 2 Man_Nontraditional         7.18         1.99  9.17  5.19   258
## 3 Woman_Nontraditional       7.21         1.79  9.01  5.42   242
## 4 Woman_Traditional          7.93         1.51  9.44  6.42   202
gender_ratings <- analysis %>% 
  group_by(applicant_gender) %>% 
  summarise(avg_rating = mean(rating),
            standard_dev = sd(rating))
gender_ratings
## # A tibble: 2 × 3
##   applicant_gender avg_rating standard_dev
##   <fct>                 <dbl>        <dbl>
## 1 man                    7.39         1.77
## 2 woman                  7.54         1.71
education_ratings <- analysis %>% 
  group_by(applicant_education) %>% 
  summarise(avg_rating = mean(rating),
            standard_dev = sd(rating))
education_ratings
## # A tibble: 2 × 3
##   applicant_education avg_rating standard_dev
##   <fct>                    <dbl>        <dbl>
## 1 trad                      7.75         1.49
## 2 nontrad                   7.19         1.90
ggplot(analysis, aes(x = rating))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  #geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$rating), color = "orchid4")+
  #geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$rating), color = "yellowgreen")+
  # geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$rating), color = "deepskyblue", linewidth = 1.25)+
  #geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$rating), color = "salmon")+
  geom_vline(xintercept = 5, color = lightgray)+
  labs(title = "Distribution of Participant Ratings by Condition",
       #       subtitle = "Vertical lines indicates average rating",
       x = "Rating",
       y = "Count")

ggplot(analysis, aes(x = rating))+
  geom_bar(stat = "count", aes(fill = Condition))+
  facet_grid(Condition ~ .)+
  labs(title = "Rating Distributions by Condition",
       x = "Rating",
       y = "Count")+
  theme(strip.text.y = element_text(angle = 0),
        legend.position = "none")

ggplot(analysis, aes(y = rating, x = Condition))+
  geom_jitter(aes(color = Condition))+
  theme(legend.position = "none")+
  labs(title = "Distribution of Ratings by Condition",
       y = "Rating")

## technical scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_technical= mean(technical_scale))
## # A tibble: 4 × 2
##   Condition            mean_technical
##   <fct>                         <dbl>
## 1 Man_Traditional                7.77
## 2 Man_Nontraditional             7.47
## 3 Woman_Nontraditional           7.24
## 4 Woman_Traditional              8.01
ggplot(analysis, aes(x = technical_scale))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$technical_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$technical_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$technical_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$technical_scale), color = "salmon")+
  labs(title = "Distribution of Technical Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

## leadership scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_leadership= mean(leadership_scale))
## # A tibble: 4 × 2
##   Condition            mean_leadership
##   <fct>                          <dbl>
## 1 Man_Traditional                 6.36
## 2 Man_Nontraditional              6.13
## 3 Woman_Nontraditional            6.19
## 4 Woman_Traditional               6.49
ggplot(analysis, aes(x = leadership_scale))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$leadership_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$leadership_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$leadership_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$leadership_scale), color = "salmon")+
  labs(title = "Distribution of Leadership Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

## likeable scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_likeable = mean(likeable_scale))
## # A tibble: 4 × 2
##   Condition            mean_likeable
##   <fct>                        <dbl>
## 1 Man_Traditional               6.53
## 2 Man_Nontraditional            6.48
## 3 Woman_Nontraditional          6.94
## 4 Woman_Traditional             7.21
ggplot(analysis, aes(x = likeable_scale))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$likeable_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$likeable_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$likeable_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$likeable_scale), color = "salmon")+
  labs(title = "Distribution of Likeable Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

## learner scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_learner = mean(learner_scale))
## # A tibble: 4 × 2
##   Condition            mean_learner
##   <fct>                       <dbl>
## 1 Man_Traditional              6.86
## 2 Man_Nontraditional           6.83
## 3 Woman_Nontraditional         6.85
## 4 Woman_Traditional            7.21
ggplot(analysis, aes(x = learner_scale))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$learner_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$learner_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$learner_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$learner_scale), color = "salmon")+
  labs(title = "Distribution of Learner Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

## education scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_education = mean(education_scale))
## # A tibble: 4 × 2
##   Condition            mean_education
##   <fct>                         <dbl>
## 1 Man_Traditional                7.99
## 2 Man_Nontraditional             5.90
## 3 Woman_Nontraditional           5.82
## 4 Woman_Traditional              8.21
ggplot(analysis, aes(x = education_scale))+
  geom_density(stat = "count",aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$education_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$education_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$education_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$education_scale), color = "salmon")+
  labs(title = "Distribution of Education Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

## experience scale
analysis %>%
  group_by(Condition) %>% 
  summarise(mean_experience= mean(experience_scale))
## # A tibble: 4 × 2
##   Condition            mean_experience
##   <fct>                          <dbl>
## 1 Man_Traditional                 7.85
## 2 Man_Nontraditional              7.77
## 3 Woman_Nontraditional            7.40
## 4 Woman_Traditional               8.19
ggplot(analysis, aes(x = experience_scale))+
  geom_density(stat = "count", aes(fill = Condition), alpha = 0.5)+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Traditional")$experience_scale), color = "orchid4")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Traditional")$experience_scale), color = "yellowgreen")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Woman_Nontraditional")$experience_scale), color = "deepskyblue")+
  geom_vline(xintercept = mean(subset(analysis, Condition == "Man_Nontraditional")$experience_scale), color = "salmon")+
  labs(title = "Distribution of Experience Scale Ratings by Condition",
       subtitle = "Vertical lines indicate group means",
       x = "Rating",
       y = "Count")+
  theme(legend.position = "none")

Testing

d <- data.table(analysis)

No Covariates - Baseline Model

## anova testing
summary(d[, aov(rating ~ Condition)])
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Condition     3   85.4  28.453   9.662 2.77e-06 ***
## Residuals   941 2771.2   2.945                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## follow-up t-tests, holm correction
d[, pairwise.t.test(rating, Condition, p.adjust.method = "bonferroni")]
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  rating and Condition 
## 
##                      Man_Traditional Man_Nontraditional Woman_Nontraditional
## Man_Nontraditional   0.033           -                  -                   
## Woman_Nontraditional 0.070           1.000              -                   
## Woman_Traditional    0.279           2.1e-05            7.2e-05             
## 
## P value adjustment method: bonferroni
## linear model
m0 <- d[, lm(rating ~ Condition)]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m0)

bptest(m0)
## 
##  studentized Breusch-Pagan test
## 
## data:  m0
## BP = 20.076, df = 3, p-value = 0.0001637
m0$vcovHC_ <- vcovHC(m0)
coefs_conditions <- coeftest(m0, vcov. = m0$vcovHC_)
coefs_conditions
## 
## t test of coefficients:
## 
##                                Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                    7.604938   0.094417 80.5462 < 2.2e-16 ***
## ConditionMan_Nontraditional   -0.426644   0.155866 -2.7373  0.006312 ** 
## ConditionWoman_Nontraditional -0.394194   0.149263 -2.6409  0.008405 ** 
## ConditionWoman_Traditional     0.325755   0.142221  2.2905  0.022214 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_conditions <- coefci(m0, vcov. = m0$vcovHC_)
ci_conditions
##                                     2.5 %     97.5 %
## (Intercept)                    7.41964581  7.7902307
## ConditionMan_Nontraditional   -0.73252792 -0.1207595
## ConditionWoman_Nontraditional -0.68712226 -0.1012667
## ConditionWoman_Traditional     0.04664698  0.6048626

Based on the previous tests, we can detect that there are significant differences between conditions. This result is shown in the ANOVA test, for which post hoc testing indicates differences between the Woman_Traditional condition and the conditions Man_Nontraditional and Woman_Nontraditional. We follow this testing with linear models to more clearly capture the quantitative differences between ratings.

Specifically, the baseline candidate received a rating, on average of 7.6 (7.42, 7.79). In comparison, men with a nontraditional education were rated lower, by an average of -0.43 (-0.73, -0.12). Women with a nontraditional education were also reated lower than men with a traditional education, by an average of -0.39 (-0.69, -0.1). Finally, women with a traditional education were actually rated higher than men with a traditional education, by an average of 0.33 (0.05, 0.6).

Applicant Attributes

The previous analysis focused specifically on the conditions shown. we are also interested in the qualities of these conditions, such as the applicant_gender & applicant_education variables. This will give us a clearer statistical understanding of the input of each quality of the applicant. At this point forward, we use linear models, rather than ANOVA and t-testing. This is to make the content more consistent with the techniques learned in the course.

m1 <- d[, lm(rating ~ applicant_gender * applicant_education)]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m1)

bptest(m1)
## 
##  studentized Breusch-Pagan test
## 
## data:  m1
## BP = 20.076, df = 3, p-value = 0.0001637
m1$vcovHC_ <- vcovHC(m1)
coefs_applicants <- coeftest(m1, vcov. = m1$vcovHC_)
coefs_applicants
## 
## t test of coefficients:
## 
##                                                   Estimate Std. Error t value
## (Intercept)                                       7.604938   0.094417 80.5462
## applicant_genderwoman                             0.325755   0.142221  2.2905
## applicant_educationnontrad                       -0.426644   0.155866 -2.7373
## applicant_genderwoman:applicant_educationnontrad -0.293306   0.221295 -1.3254
##                                                   Pr(>|t|)    
## (Intercept)                                      < 2.2e-16 ***
## applicant_genderwoman                             0.022214 *  
## applicant_educationnontrad                        0.006312 ** 
## applicant_genderwoman:applicant_educationnontrad  0.185358    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_applicants <- coefci(m1, vcov. = m1$vcovHC_)
ci_applicants
##                                                        2.5 %     97.5 %
## (Intercept)                                       7.41964581  7.7902307
## applicant_genderwoman                             0.04664698  0.6048626
## applicant_educationnontrad                       -0.73252792 -0.1207595
## applicant_genderwoman:applicant_educationnontrad -0.72759399  0.1409828

In this case, we observe a significant effect of gender overall, but in the opposite direction which we expected. Women, on average, receive a higher rating than men (0.3257548 higher, p = 0.0222141). We also see a slight effect of education (p = 0.0063122). Specifically, the applicants with a nontraditional education background are rated -0.4266437 lower than those with traditional educations. There is no interaction effect observed (p = 0.185358).

Add Risk Aversion

m2 <- d[, lm(data = analysis, rating ~ applicant_gender * applicant_education + risk)]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m2)

bptest(m2)
## 
##  studentized Breusch-Pagan test
## 
## data:  m2
## BP = 23.802, df = 4, p-value = 8.754e-05
m2$vcovHC_ <- vcovHC(m2)
coefs_risk <- coeftest(m2, vcov. = m2$vcovHC_)
coefs_risk
## 
## t test of coefficients:
## 
##                                                   Estimate Std. Error t value
## (Intercept)                                       7.859754   0.318447 24.6815
## applicant_genderwoman                             0.324235   0.142506  2.2752
## applicant_educationnontrad                       -0.429148   0.156220 -2.7471
## risk                                             -0.057923   0.068543 -0.8451
## applicant_genderwoman:applicant_educationnontrad -0.274705   0.223623 -1.2284
##                                                   Pr(>|t|)    
## (Intercept)                                      < 2.2e-16 ***
## applicant_genderwoman                             0.023116 *  
## applicant_educationnontrad                        0.006128 ** 
## risk                                              0.398287    
## applicant_genderwoman:applicant_educationnontrad  0.219594    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_risk <- coefci(m2, vcov. = m2$vcovHC_)
ci_risk
##                                                        2.5 %     97.5 %
## (Intercept)                                       7.23480342  8.4847037
## applicant_genderwoman                             0.04456753  0.6039023
## applicant_educationnontrad                       -0.73572846 -0.1225674
## risk                                             -0.19243799  0.0765912
## applicant_genderwoman:applicant_educationnontrad -0.71356357  0.1641535

We add risk aversion scores to the model to capture individual differences in regards to the amount of risk one may be confortable taking on. It does not change the estimations from the previous model.

Add Participant Gender

m3 <- d[, lm(data = analysis, rating ~ applicant_gender * applicant_education + risk + 
               factor(gender, levels = c("Man", "Woman")))]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m3)

bptest(m3)
## 
##  studentized Breusch-Pagan test
## 
## data:  m3
## BP = 25.67, df = 5, p-value = 0.0001034
m3$vcovHC_ <- vcovHC(m3)
coefs_gender <- coeftest(m3, vcov. = m3$vcovHC_)
coefs_gender
## 
## t test of coefficients:
## 
##                                                   Estimate Std. Error t value
## (Intercept)                                       7.798428   0.329388 23.6755
## applicant_genderwoman                             0.323980   0.143921  2.2511
## applicant_educationnontrad                       -0.452508   0.157190 -2.8787
## risk                                             -0.034876   0.071487 -0.4879
## factor(gender, levels = c("Man", "Woman"))Woman  -0.167853   0.141841 -1.1834
## applicant_genderwoman:applicant_educationnontrad -0.412117   0.227034 -1.8152
##                                                   Pr(>|t|)    
## (Intercept)                                      < 2.2e-16 ***
## applicant_genderwoman                             0.024617 *  
## applicant_educationnontrad                        0.004086 ** 
## risk                                              0.625764    
## factor(gender, levels = c("Man", "Woman"))Woman   0.236965    
## applicant_genderwoman:applicant_educationnontrad  0.069820 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_gender <- coefci(m3, vcov. = m3$vcovHC_)
ci_gender
##                                                        2.5 %      97.5 %
## (Intercept)                                       7.15198074  8.44487570
## applicant_genderwoman                             0.04152415  0.60643520
## applicant_educationnontrad                       -0.76100515 -0.14401069
## risk                                             -0.17517558  0.10542334
## factor(gender, levels = c("Man", "Woman"))Woman  -0.44622758  0.11052127
## applicant_genderwoman:applicant_educationnontrad -0.85768850  0.03345497

Add Company Size

m4 <- d[, lm(data = analysis, rating ~ applicant_gender * applicant_education + risk + 
               factor(gender, levels = c("Man", "Woman"))+
               factor(company_size))]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m4)

bptest(m4)
## 
##  studentized Breusch-Pagan test
## 
## data:  m4
## BP = 44.939, df = 9, p-value = 9.469e-07
m4$vcovHC_ <- vcovHC(m4)
coefs_company <- coeftest(m4, vcov. = m4$vcovHC_)
coefs_company
## 
## t test of coefficients:
## 
##                                                   Estimate Std. Error t value
## (Intercept)                                       7.892319   0.326257 24.1905
## applicant_genderwoman                             0.345778   0.141297  2.4472
## applicant_educationnontrad                       -0.458735   0.157777 -2.9075
## risk                                             -0.029151   0.072010 -0.4048
## factor(gender, levels = c("Man", "Woman"))Woman  -0.221264   0.144200 -1.5344
## factor(company_size)100-500 employees            -0.412427   0.159916 -2.5790
## factor(company_size)50-100 employees             -0.190632   0.225698 -0.8446
## factor(company_size)500-1,000 employees           0.182692   0.167270  1.0922
## factor(company_size)Less than 50 employees       -0.124910   0.154801 -0.8069
## applicant_genderwoman:applicant_educationnontrad -0.392527   0.231026 -1.6991
##                                                   Pr(>|t|)    
## (Intercept)                                      < 2.2e-16 ***
## applicant_genderwoman                             0.014588 *  
## applicant_educationnontrad                        0.003732 ** 
## risk                                              0.685710    
## factor(gender, levels = c("Man", "Woman"))Woman   0.125274    
## factor(company_size)100-500 employees             0.010065 *  
## factor(company_size)50-100 employees              0.398537    
## factor(company_size)500-1,000 employees           0.275036    
## factor(company_size)Less than 50 employees        0.419934    
## applicant_genderwoman:applicant_educationnontrad  0.089651 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_company <- coefci(m4, vcov. = m4$vcovHC_)
ci_company
##                                                        2.5 %      97.5 %
## (Intercept)                                       7.25201096  8.53262719
## applicant_genderwoman                             0.06846953  0.62308620
## applicant_educationnontrad                       -0.76838504 -0.14908423
## risk                                             -0.17047712  0.11217566
## factor(gender, levels = c("Man", "Woman"))Woman  -0.50426929  0.06174064
## factor(company_size)100-500 employees            -0.72627600 -0.09857766
## factor(company_size)50-100 employees             -0.63358318  0.25231836
## factor(company_size)500-1,000 employees          -0.14558936  0.51097278
## factor(company_size)Less than 50 employees       -0.42872049  0.17890127
## applicant_genderwoman:applicant_educationnontrad -0.84593485  0.06088052

Add Scale Measures

m5 <- d[, lm(data = analysis, rating ~ applicant_gender * applicant_education + risk + 
               factor(gender, levels = c("Man", "Woman"))+
               factor(company_size) +
               technical_scale +
               leadership_scale +
               likeable_scale +
               learner_scale +
               education_scale +
               experience_scale)]

# test for heteroscedasticity to determine if use of robust standard errors is justified
par(mfrow = c(2, 2))
plot(m5)

bptest(m5)
## 
##  studentized Breusch-Pagan test
## 
## data:  m5
## BP = 77.419, df = 15, p-value = 2.064e-10
m5$vcovHC_ <- vcovHC(m5)
coefs_measures <- coeftest(m5, vcov. = m5$vcovHC_)
coefs_measures
## 
## t test of coefficients:
## 
##                                                   Estimate Std. Error t value
## (Intercept)                                       1.839998   0.423262  4.3472
## applicant_genderwoman                             0.138368   0.108474  1.2756
## applicant_educationnontrad                       -0.063866   0.125553 -0.5087
## risk                                             -0.025684   0.056123 -0.4576
## factor(gender, levels = c("Man", "Woman"))Woman  -0.125652   0.115270 -1.0901
## factor(company_size)100-500 employees            -0.130250   0.114881 -1.1338
## factor(company_size)50-100 employees             -0.263907   0.190720 -1.3837
## factor(company_size)500-1,000 employees           0.098722   0.136123  0.7252
## factor(company_size)Less than 50 employees        0.113916   0.131731  0.8648
## technical_scale                                   0.361842   0.049502  7.3097
## leadership_scale                                  0.050171   0.039024  1.2856
## likeable_scale                                    0.031530   0.041445  0.7608
## learner_scale                                     0.059815   0.042350  1.4124
## education_scale                                   0.102940   0.026672  3.8595
## experience_scale                                  0.173337   0.043181  4.0142
## applicant_genderwoman:applicant_educationnontrad -0.062517   0.176297 -0.3546
##                                                   Pr(>|t|)    
## (Intercept)                                      1.536e-05 ***
## applicant_genderwoman                            0.2024309    
## applicant_educationnontrad                       0.6111043    
## risk                                             0.6473247    
## factor(gender, levels = c("Man", "Woman"))Woman  0.2759784    
## factor(company_size)100-500 employees            0.2571860    
## factor(company_size)50-100 employees             0.1667815    
## factor(company_size)500-1,000 employees          0.4684936    
## factor(company_size)Less than 50 employees       0.3874019    
## technical_scale                                  5.911e-13 ***
## leadership_scale                                 0.1989027    
## likeable_scale                                   0.4469881    
## learner_scale                                    0.1581781    
## education_scale                                  0.0001217 ***
## experience_scale                                 6.460e-05 ***
## applicant_genderwoman:applicant_educationnontrad 0.7229626    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ci_measures <- coefci(m5, vcov. = m5$vcovHC_)
ci_measures
##                                                        2.5 %     97.5 %
## (Intercept)                                       1.00930305 2.67069214
## applicant_genderwoman                            -0.07452340 0.35125905
## applicant_educationnontrad                       -0.31027534 0.18254431
## risk                                             -0.13583055 0.08446298
## factor(gender, levels = c("Man", "Woman"))Woman  -0.35188147 0.10057833
## factor(company_size)100-500 employees            -0.35571588 0.09521517
## factor(company_size)50-100 employees             -0.63821419 0.11040101
## factor(company_size)500-1,000 employees          -0.16843324 0.36587632
## factor(company_size)Less than 50 employees       -0.14462047 0.37245181
## technical_scale                                   0.26468911 0.45899420
## leadership_scale                                 -0.02641826 0.12675943
## likeable_scale                                   -0.04980948 0.11287036
## learner_scale                                    -0.02330108 0.14293100
## education_scale                                   0.05059387 0.15528577
## experience_scale                                  0.08859102 0.25808349
## applicant_genderwoman:applicant_educationnontrad -0.40851757 0.28348338
stargazer(
  m1, 
  m2,
  m3,
  m4,
  m5,
  type = 'html',
  se = list(sqrt(diag(m1$vcovHC_)),
            sqrt(diag(m2$vcovHC_)),
            sqrt(diag(m3$vcovHC_)),
            sqrt(diag(m4$vcovHC_)),
            sqrt(diag(m5$vcovHC_))
  ),
  header=FALSE, # to get rid of r package output text
  
  single.row = TRUE, # to put coefficients and standard errors on same line
  
  no.space = TRUE, # to remove the spaces after each line of coefficients
  
  column.sep.width = "3pt", # to reduce column width
  
  font.size = "small" # to make font size smaller
)
## 
## <table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">rating</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">applicant_genderwoman</td><td>0.326<sup>**</sup> (0.142)</td><td>0.324<sup>**</sup> (0.143)</td><td>0.324<sup>**</sup> (0.144)</td><td>0.346<sup>**</sup> (0.141)</td><td>0.138 (0.108)</td></tr>
## <tr><td style="text-align:left">applicant_educationnontrad</td><td>-0.427<sup>***</sup> (0.156)</td><td>-0.429<sup>***</sup> (0.156)</td><td>-0.453<sup>***</sup> (0.157)</td><td>-0.459<sup>***</sup> (0.158)</td><td>-0.064 (0.126)</td></tr>
## <tr><td style="text-align:left">risk</td><td></td><td>-0.058 (0.069)</td><td>-0.035 (0.071)</td><td>-0.029 (0.072)</td><td>-0.026 (0.056)</td></tr>
## <tr><td style="text-align:left">factor(gender, levels = c("Man", "Woman"))Woman</td><td></td><td></td><td>-0.168 (0.142)</td><td>-0.221 (0.144)</td><td>-0.126 (0.115)</td></tr>
## <tr><td style="text-align:left">factor(company_size)100-500 employees</td><td></td><td></td><td></td><td>-0.412<sup>***</sup> (0.160)</td><td>-0.130 (0.115)</td></tr>
## <tr><td style="text-align:left">factor(company_size)50-100 employees</td><td></td><td></td><td></td><td>-0.191 (0.226)</td><td>-0.264 (0.191)</td></tr>
## <tr><td style="text-align:left">factor(company_size)500-1,000 employees</td><td></td><td></td><td></td><td>0.183 (0.167)</td><td>0.099 (0.136)</td></tr>
## <tr><td style="text-align:left">factor(company_size)Less than 50 employees</td><td></td><td></td><td></td><td>-0.125 (0.155)</td><td>0.114 (0.132)</td></tr>
## <tr><td style="text-align:left">technical_scale</td><td></td><td></td><td></td><td></td><td>0.362<sup>***</sup> (0.050)</td></tr>
## <tr><td style="text-align:left">leadership_scale</td><td></td><td></td><td></td><td></td><td>0.050 (0.039)</td></tr>
## <tr><td style="text-align:left">likeable_scale</td><td></td><td></td><td></td><td></td><td>0.032 (0.041)</td></tr>
## <tr><td style="text-align:left">learner_scale</td><td></td><td></td><td></td><td></td><td>0.060 (0.042)</td></tr>
## <tr><td style="text-align:left">education_scale</td><td></td><td></td><td></td><td></td><td>0.103<sup>***</sup> (0.027)</td></tr>
## <tr><td style="text-align:left">experience_scale</td><td></td><td></td><td></td><td></td><td>0.173<sup>***</sup> (0.043)</td></tr>
## <tr><td style="text-align:left">applicant_genderwoman:applicant_educationnontrad</td><td>-0.293 (0.221)</td><td>-0.275 (0.224)</td><td>-0.412<sup>*</sup> (0.227)</td><td>-0.393<sup>*</sup> (0.231)</td><td>-0.063 (0.176)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>7.605<sup>***</sup> (0.094)</td><td>7.860<sup>***</sup> (0.318)</td><td>7.798<sup>***</sup> (0.329)</td><td>7.892<sup>***</sup> (0.326)</td><td>1.840<sup>***</sup> (0.423)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>945</td><td>945</td><td>916</td><td>916</td><td>916</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.030</td><td>0.031</td><td>0.040</td><td>0.053</td><td>0.436</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.027</td><td>0.027</td><td>0.035</td><td>0.043</td><td>0.427</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>1.716 (df = 941)</td><td>1.716 (df = 940)</td><td>1.713 (df = 910)</td><td>1.706 (df = 906)</td><td>1.321 (df = 900)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>9.662<sup>***</sup> (df = 3; 941)</td><td>7.444<sup>***</sup> (df = 4; 940)</td><td>7.621<sup>***</sup> (df = 5; 910)</td><td>5.590<sup>***</sup> (df = 9; 906)</td><td>46.390<sup>***</sup> (df = 15; 900)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>